In [2]:
#Inports Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
import plotly.express as px
import plotly.graph_objs as go
In [3]:
df = pd.read_csv(r'..\datasets\vgsales.csv')
In [4]:
# Defines regions and their column names
regions = {
'North America': 'NA_Sales',
'Europe': 'EU_Sales',
'Japan': 'JP_Sales',
'Other Regions': 'Other_Sales'
}
# Grouping by Platform and counting(sum) up the sales for each region
platform_sales = df.groupby('Platform')[list(regions.values())].sum()
# Creates a plot (Plotly)
fig = go.Figure()
# x,y
for region, column in regions.items():
fig.add_trace(go.Bar(
x=platform_sales.index,
y=platform_sales[column],
name=region
))
# Drop-down menu
fig.update_layout(
updatemenus=[
{
'buttons': [
{
'label': region,
'method': 'update',
'args': [{'visible': [column == r for r in regions.values()]}]
}
for region, column in regions.items()
],
'direction': 'down',
'showactive': True,
}
],
title='Video Game Sales by Platform and Region (1980-2002)',
xaxis_title='Platform',
yaxis_title='Sales (millions)',
barmode='stack',
)
fig.show()
In [5]:
# Define regions and their column names
regions = {
'North America': 'NA_Sales',
'Europe': 'EU_Sales',
'Japan': 'JP_Sales',
'Other Regions': 'Other_Sales'
}
# Grouping by Genre and counting(sum) up the sales for each region
genre_sales = df.groupby('Genre')[list(regions.values())].sum()
# Creates a plot (Plotly)
fig = go.Figure()
# x,y
for region, column in regions.items():
fig.add_trace(go.Bar(
x=genre_sales.index,
y=genre_sales[column],
name=region
))
# Drop-down menu
fig.update_layout(
updatemenus=[
{
'buttons': [
{
'label': region,
'method': 'update',
'args': [{'visible': [column == r for r in regions.values()]}]
}
for region, column in regions.items()
],
'direction': 'down',
'showactive': True,
}
],
title='Video Game Sales by Genre and Region (1980-2020)',
xaxis_title='Genre',
yaxis_title='Sales (millions)',
barmode='stack',
)
fig.show()
In [6]:
# Group by Year and Genre and sum up the sales
year_genre_sales = df.groupby(['Year', 'Genre']).sum()['Global_Sales'].unstack().fillna(0)
# Plot (Seaborn)
plt.figure(figsize=(12, 10))
year_genre_sales.plot(kind='area', stacked=True, colormap='tab20c')
# Customizing the plot
plt.title('Comparison of Global Sales by Genre Over the Years', fontsize=12, fontweight='bold')
plt.xlabel('Year', fontsize=10)
plt.ylabel('Global Sales (millions)', fontsize=10)
plt.grid(axis='y', linestyle='-', linewidth=0.7) # Only horizontal gridlines
plt.legend(loc ='upper left' , bbox_to_anchor = (0,1))
plt.tight_layout()
plt.show()
<Figure size 1200x1000 with 0 Axes>
In [7]:
# Define regions and their column names in the dataset
regions = {
'North America': 'NA_Sales',
'Europe': 'EU_Sales',
'Japan': 'JP_Sales',
'Other Regions': 'Other_Sales'
}
#Creates an empty dictionary to store the top 10 games for each region // {}: This syntax creates an empty dictionary in Python.
top_10_games = {}
# For Loop to find 10 games based on sales
for region, column in regions.items():
top_10_games[region] = df.nlargest(10, column)[['Rank', 'Name', 'Platform', column]]
# Visualization- Bar chart for top 10 games in each region
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
for ax, (region, column) in zip(axes.flatten(), regions.items()):
top_games = top_10_games[region]
bars = ax.barh(top_games['Name'] + ' (' + top_games['Platform'] + ')', top_games[column], color='skyblue')
ax.set_title(f"Top 10 Games in {region} (1980-2020)")
ax.set_xlabel('Sales (millions)')
ax.grid(axis='x', linestyle='--') # x gridlines
ax.grid(axis='y', linestyle='') # y gridlines off
ax.invert_yaxis()
plt.tight_layout()
plt.show()
In [8]:
# Define regions and their column names in the dataset
regions = {
'North America': 'NA_Sales',
'Europe': 'EU_Sales',
'Japan': 'JP_Sales',
'Other Regions': 'Other_Sales'
}
#Creates an empty dictionary to store the top 10 games for each region // {}: This syntax creates an empty dictionary in Python.
top_10_genres = {}
# For Loop to find top genres by regional sales
for region, column in regions.items():
# Group by Genre and sum up the sales for each region
region_sales = df.groupby('Genre')[column].sum()
top_10_genres[region] = region_sales.nlargest(10)
# Visualization- Bar chart
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
for ax, (region, column) in zip(axes.flatten(), regions.items()):
top_genres = top_10_genres[region]
bars = ax.barh(top_genres.index, top_genres.values, color='skyblue')
ax.set_title(f"Top 10 Genres in {region} (1980-2020)")
ax.set_xlabel('Sales (millions)')
ax.grid(axis='x', linestyle='--') # x gridlines
ax.grid(axis='y', linestyle='') # y gridlines off
ax.invert_yaxis()
plt.tight_layout()
plt.show()